%pyspark
from utilTool import *
import sys

"""
正式跑的时候打开该语句，默认调试跑都是关闭
"""
#instanceId = sys.argv[1]

"""
以下是，用户实际看到的代码快
"""

# use dollar sign to get input data stream
sourceTable = "dataset.l_53_1614145758763_m";
# 注意调试的时候修改该变量的名字，否则可能报表已经存储错误
targetTable = "pipeline.solid_demo_temp_table"
# '_record_id_' is a necessary column
featureCols = "_record_id_,xx,yy"
# 1. spark session环境准备
spark = getOrCreateSparkSession()
# 2. 读取数据, 从greenplum数据库中读取数据为spark dataset格式
dataset = readFromGreenPlum(spark, sourceTable)
cols = featureCols.split(",")
# 3. 处理数据
newDataset = dataset.select(cols)
# 4. 存储数据, 存储数据到greenplum表中
saveTableForGreenPlum(newDataset, targetTable)
# 5. 返回结果信息
ret = getResultMeta([targetTable], {"feature_cols": featureCols}, 0, "success")

print(ret)


"""
存储输出表meta信息到指定位置
交互式调试的时候默认关闭，否则会报错，正式执行打开.
"""
#saveMetaForMysql(getResultMeta([targetTable], {"feature_cols": featureCols}, 0, "success"), instanceId)
#spark.stop()

